#!/usr/bin/env python3

# Copyright 2019 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Output devstats repo_groups.sql based on subproject definitions in sigs.yaml

This is likely missing a few repos because:
    - some repos lack an owner (eg: kubernetes/kubernetes)
    - it doesn't enumerate all repos from all kubernetes-owned orgs
    - it ignores the fact that committees can own repos, only grouping by sig

The sql generated is NOT intended to overwrite/replace the file that lives at
github.com/cncf/devstats/scripts/kubernetes/repo_groups.sql, but instead aid a
human in doing some manual updates to the file. Future improvements to this
script could eliminate that part of the process, but it's where we are today.
"""

import argparse
import ruamel.yaml as yaml
import json
import re
import sys

repo_group_sql_template = """
update gha_repos set repo_group = '{}' where name in (
{}
);
"""

# copied from github.com/cncf/devstats/scripts/kubernetes/repo_groups.sql,
# if this differs, consider cncf the authoritative source and update this
repo_groups_sql_header = """-- generated by github.com/kubernetes/community/hack/generate-devstats-repo-sql.py
-- Add repository groups
"""

# copied from github.com/cncf/devstats/scripts/kubernetes/repo_groups.sql,
# if this differs, consider cncf the authoritative source and update this
repo_groups_sql_footer = """
-- All other unknown repositories should have 'Other' repository group
-- update gha_repos set repo_group = 'Other' where repo_group is null;

-- By default alias is the newest repo name for given repo ID
update
  gha_repos r
set
  alias = coalesce((
    select e.dup_repo_name
    from
      gha_events e
    where
      e.repo_id = r.id
    order by
      e.created_at desc
    limit 1
  ), name)
;

update gha_repos set alias = 'kubernetes/kubernetes' where name like '%kubernetes' or name = 'kubernetes/';

select
  repo_group,
  count(*) as number_of_repos
from
  gha_repos
where
  repo_group is not null
group by
  repo_group
order by
  number_of_repos desc,
  repo_group asc;

"""

special_case_groups = [{
    # the main repo has no single owner and has gone by many names
    'name': 'Kubernetes',
    'repos': [
        'kubernetes/kubernetes',
        'GoogleCloudPlatform/kubernetes',
        'kubernetes',
        'kubernetes/'
    ]
}]

# devstats isn't aware of repo renames or migrations; we need to keep
# old repo names in its sql groups present for historical purposes;
#
# when reconciling deletions from repo_groups.sql by this script, use 
# github.com/kubernetes/org issues to determine why; renamed, migrated,
# or used-and-retired repos belong here; unused/deleted repos do not
renamed_repos = {
    'sig-architecture': [
        'kubernetes/contrib',
    ],
    'sig-api-machinery': [
        'kubernetes-incubator/apiserver-builder',
    ],
    'sig-cluster-lifecycle': [
        'kubernetes-incubator/kubespray',
    ],
    'sig-multicluster': [
        'kubernetes-sigs/federation-v2',
    ],
    'sig-node': [
        'kubernetes-incubator/node-feature-discovery',
    ],
    'sig-pm': [
        'kubernetes/features',
    ],
    'sig-service-catalog': [
        'kubernetes-incubator/service-catalog',
    ]
}

def repos_from_k8s_group(k8s_group):
    """Returns a list of org/repos given a kubernetes community group"""
    repos = {}
    subprojects = k8s_group.get('subprojects', [])
    if subprojects is None:
        subprojects = []
    for sp in subprojects:
        for uri in sp['owners']:
            owners_path = re.sub(r"https://raw.githubusercontent.com/(.*)/master/(.*)",r"\1/\2",uri)
            path_parts = owners_path.split('/')
            # org/repo is owned by k8s_group if org/repo/OWNERS os in one of their subprojects
            if path_parts[2] == 'OWNERS':
                repo = '/'.join(path_parts[0:2])
                repos[repo] = True
    return sorted(repos.keys())

def k8s_group_name(k8s_group):
    group_dir = k8s_group.get('dir', '')
    if group_dir.startswith('sig-'):
        return "SIG " + k8s_group['name']
    if group_dir.startswith('committee-'):
        return k8s_group['name'] + " Committee"
    return "UNKNOWN " + group_dir

def write_repo_groups_template(name, repos, fp):
    if len(repos):
        fp.write(
            repo_group_sql_template.format(
                name,
                ',\n'.join(['  \'{}\''.format(r) for r in repos])))

def write_repo_groups_sql(k8s_groups, fp):
    fp.write(repo_groups_sql_header)
    for g in special_case_groups:
        write_repo_groups_template(g['name'], g['repos'], fp)
    for group_type in ['sigs', 'committees']:
        for g in k8s_groups[group_type]:
            repos = set(repos_from_k8s_group(g)) | set(renamed_repos.get(g['dir'],[]))
            repos = sorted(list(repos))
            write_repo_groups_template(k8s_group_name(g), repos, fp)
    fp.write(repo_groups_sql_footer)

def main(sigs_yaml, repo_groups_sql):
    with open(sigs_yaml) as fp:
        k8s_groups = yaml.round_trip_load(fp)

    if repo_groups_sql is not None:
        with open(repo_groups_sql, 'w') as fp:
            write_repo_groups_sql(k8s_groups, fp)
    else:
        write_repo_groups_sql(k8s_groups, sys.stdout)

if __name__ == '__main__':
    PARSER = argparse.ArgumentParser(
        description='Generate a repo_groups.sql intended for github.com/cncf/devstats/scripts/kubernetes/repo_groups.sql')
    PARSER.add_argument(
        '--sigs-yaml',
        default='./sigs.yaml',
        help='Path to sigs.yaml')
    PARSER.add_argument(
        '--repo-groups-sql',
        help='Path to output repo_groups.sql if provided')
    ARGS = PARSER.parse_args()

    main(ARGS.sigs_yaml, ARGS.repo_groups_sql)

